knitr::opts_chunk$set(echo = TRUE)
# library(SingleCellExperiment)
library(scater)## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: scuttle
## Loading required package: ggplot2
library(scran)
library(dplyr)##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(patchwork)
library(BiocParallel)Import h5 files from cellranger
sce <- DropletUtils::read10xCounts(samples = file.path("data/",
c("IgG2A", "ICB", "Propranolol", "Propranolol_ICB"),
"/filtered_feature_bc_matrix"),
sample.names = c("IgG2A", "ICB", "Propranolol", "Propranolol_ICB"))
# set an order of the samples
sce$Sample <- factor(sce$Sample,
levels = c("IgG2A",
"ICB",
"Propranolol",
"Propranolol_ICB"))
sce## class: SingleCellExperiment
## dim: 32286 71443
## metadata(1): Samples
## assays(1): counts
## rownames(32286): ENSMUSG00000051951 ENSMUSG00000089699 ...
## ENSMUSG00000095041 YFP
## rowData names(3): ID Symbol Type
## colnames: NULL
## colData names(2): Sample Barcode
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
QC
# Find mito genes
is.mito <- grepl("^mt-", rowData(sce)$Symbol)
unfiltered <- sce
stats <- perCellQCMetrics(sce, subsets=list(Mito=which(is.mito)))
qc <- quickPerCellQC(stats, percent_subsets="subsets_Mito_percent")
sce <- sce[,!qc$discard]
colData(unfiltered) <- cbind(colData(unfiltered), stats)
unfiltered$discard <- qc$discard
gridExtra::grid.arrange(
plotColData(unfiltered, y="sum", colour_by="discard") +
scale_y_log10() + ggtitle("Total count"),
plotColData(unfiltered, y="detected", colour_by="discard") +
scale_y_log10() + ggtitle("Detected features"),
plotColData(unfiltered, y="subsets_Mito_percent",
colour_by="discard") + ggtitle("Mito percent"),
ncol=2
)plotColData(unfiltered, x="sum", y="subsets_Mito_percent",
colour_by="discard") + scale_x_log10() + xlab("Total count")rm(unfiltered)Normalization & variance-modelling
#--- normalization ---#
set.seed(101000110)
clusters <- quickCluster(sce)
sce <- computeSumFactors(sce, clusters=clusters)
sce <- logNormCounts(sce)
plot(librarySizeFactors(sce), sizeFactors(sce), pch=16,
xlab="Library size factors", ylab="Deconvolution factors", log="xy")#--- variance-modelling ---#
set.seed(00010101)
dec.sce <- modelGeneVarByPoisson(sce)
top.sce <- getTopHVGs(dec.sce, prop=0.1)
plot(dec.sce$mean, dec.sce$total, pch=16, cex=0.5,
xlab="Mean of log-expression", ylab="Variance of log-expression")
curfit <- metadata(dec.sce)
curve(curfit$trend(x), col='dodgerblue', add=TRUE, lwd=2)Dimensional reduction
library(BiocSingular)
set.seed(101010011)
sce <- denoisePCA(sce, technical=dec.sce, subset.row=top.sce)
#sce <- runTSNE(sce, dimred="PCA")
sce <- runUMAP(sce, dimred="PCA",
metric = "cosine",
min_dist = 0.3,
n_neighbors = 30)Clustering
resolution_parameter was set to 0.05. This results in 11 clusters.
snn.gr <- buildSNNGraph(sce, use.dimred="PCA", k=25)
colLabels(sce) <- factor(igraph::cluster_leiden(snn.gr, resolution_parameter = 0.05)$membership)
table(colLabels(sce))##
## 1 2 3 4 5 6 7 8 9 10 11
## 13406 11160 21118 1958 3448 8641 926 1185 731 2474 154
Some diagnostic plots
plotUMAP(sce, colour_by="label", text_by="label") + coord_fixed()plotUMAP(sce, colour_by="Sample") + coord_fixed()plotUMAP(sce, colour_by="label", other_fields="Sample") + facet_wrap(~Sample) + coord_fixed()plotUMAP(sce, colour_by="Cd8a", swap_rownames="Symbol") + coord_fixed()YFP
plotUMAP(sce, colour_by="YFP", swap_rownames="Symbol") + coord_fixed()Save RDS
saveRDS(sce, file="data/sce_preprocessed.rds")Session info
sessionInfo()## R version 4.1.3 (2022-03-10)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora Linux 36 (Container Image)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libflexiblas.so.3.3
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices datasets utils
## [8] methods base
##
## other attached packages:
## [1] BiocSingular_1.8.1 BiocParallel_1.26.2
## [3] patchwork_1.1.1 dplyr_1.0.7
## [5] scran_1.20.1 scater_1.20.1
## [7] ggplot2_3.3.5 scuttle_1.2.1
## [9] SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0
## [11] Biobase_2.52.0 GenomicRanges_1.44.0
## [13] GenomeInfoDb_1.28.2 IRanges_2.26.0
## [15] S4Vectors_0.30.0 BiocGenerics_0.38.0
## [17] MatrixGenerics_1.4.3 matrixStats_0.60.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 RcppAnnoy_0.0.19
## [3] tools_4.1.3 bslib_0.2.5.1
## [5] utf8_1.2.2 R6_2.5.1
## [7] irlba_2.3.3 HDF5Array_1.20.0
## [9] vipor_0.4.5 uwot_0.1.10
## [11] DBI_1.1.1 colorspace_2.0-2
## [13] rhdf5filters_1.4.0 withr_2.4.2
## [15] tidyselect_1.1.1 gridExtra_2.3
## [17] compiler_4.1.3 BiocNeighbors_1.10.0
## [19] DelayedArray_0.18.0 labeling_0.4.2
## [21] bookdown_0.23 sass_0.4.0
## [23] scales_1.1.1 stringr_1.4.0
## [25] digest_0.6.27 R.utils_2.10.1
## [27] rmarkdown_2.10 XVector_0.32.0
## [29] pkgconfig_2.0.3 htmltools_0.5.2
## [31] sparseMatrixStats_1.4.2 highr_0.9
## [33] fastmap_1.1.0 limma_3.48.3
## [35] rlang_0.4.11 DelayedMatrixStats_1.14.3
## [37] farver_2.1.0 jquerylib_0.1.4
## [39] generics_0.1.0 jsonlite_1.7.2
## [41] R.oo_1.24.0 RCurl_1.98-1.4
## [43] magrittr_2.0.1 GenomeInfoDbData_1.2.6
## [45] Matrix_1.3-4 Rhdf5lib_1.14.2
## [47] Rcpp_1.0.7 ggbeeswarm_0.6.0
## [49] munsell_0.5.0 fansi_0.5.0
## [51] viridis_0.6.1 R.methodsS3_1.8.1
## [53] lifecycle_1.0.0 stringi_1.7.4
## [55] yaml_2.2.1 edgeR_3.34.0
## [57] zlibbioc_1.38.0 rhdf5_2.36.0
## [59] grid_4.1.3 dqrng_0.3.0
## [61] forcats_0.5.1 crayon_1.4.1
## [63] lattice_0.20-44 cowplot_1.1.1
## [65] beachmat_2.8.1 locfit_1.5-9.4
## [67] metapod_1.0.0 knitr_1.33
## [69] pillar_1.6.2 igraph_1.2.6.9118
## [71] codetools_0.2-18 ScaledMatrix_1.0.0
## [73] glue_1.4.2 evaluate_0.14
## [75] renv_0.15.4 BiocManager_1.30.16
## [77] vctrs_0.3.8 rmdformats_1.0.2
## [79] gtable_0.3.0 purrr_0.3.4
## [81] assertthat_0.2.1 xfun_0.25
## [83] DropletUtils_1.12.2 rsvd_1.0.5
## [85] RSpectra_0.16-0 viridisLite_0.4.0
## [87] tibble_3.1.4 beeswarm_0.4.0
## [89] cluster_2.1.2 bluster_1.2.1
## [91] statmod_1.4.36 ellipsis_0.3.2